# datasets/__init__.py
from .base import DatasetSpec
from .registry import register, load, DATASETS
from .transforms import default_preprocess

# Import built-ins to auto-register them:
from .synthetic import load_swiss_roll   # noqa: F401
from .h5ad_generic import load_h5ad      # noqa: F401
from .pbmc3k import load_pbmc3k          # noqa: F401
from .digits import load_digits 
__all__ = [
    "DatasetSpec",
    "register",
    "load",
    "DATASETS",
    "default_preprocess",
]

# TEST
"""
from datasets import load, DATASETS

print("Available datasets:", sorted(DATASETS))

# Swiss-roll (no preprocessing by default beyond casting)
spec = load("swiss_roll", n_samples=5000, preprocess=False)
print(spec.name, spec.X.shape, spec.labels.shape)

# With preprocessing (e.g., PCA=0 keeps ambient)
spec = load("swiss_roll", preprocess=True, pca_n=0)

# PBMC3k (requires scanpy/anndata)
spec = load("pbmc3k", preprocess=True, pca_n=50)

# Generic H5AD path
spec = load("h5ad", path="/path/to/file.h5ad", label_key="cell_type", batch_key="batch")
"""